
**********
* Readme *
**********

* This script
* [1] downloads the original .txt files with the microdata for pnad (2017q1 to 2018q4) from IBGE;
* [2] reads the data using the datazoom program;
* [3] and stacks them into a single .dta file.


* Root folder (PATH TO BE DEFINED BY THE USER)
**********************************************
clear all
global analysis "C:/***/replication_package"


* Timestamped log
*****************
global today = strofreal(date(c(current_date), "DMY"), "%tdYYNNDD")
log using "${analysis}/code/logs/1_2_read_pnad_microdata_${today}.smcl", replace


***************************
* Download and unzip data *
***************************

* Set working directory
cd "${analysis}/data/source_files/pnad"

* Download source data
copy "https://ftp.ibge.gov.br/Trabalho_e_Rendimento/Pesquisa_Nacional_por_Amostra_de_Domicilios_continua/Trimestral/Microdados/2017/PNADC_012017_20220916.zip" "PNADC_012017_20220916.zip"
copy "https://ftp.ibge.gov.br/Trabalho_e_Rendimento/Pesquisa_Nacional_por_Amostra_de_Domicilios_continua/Trimestral/Microdados/2017/PNADC_022017_20220916.zip" "PNADC_022017_20220916.zip"
copy "https://ftp.ibge.gov.br/Trabalho_e_Rendimento/Pesquisa_Nacional_por_Amostra_de_Domicilios_continua/Trimestral/Microdados/2017/PNADC_032017_20220916.zip" "PNADC_032017_20220916.zip"
copy "https://ftp.ibge.gov.br/Trabalho_e_Rendimento/Pesquisa_Nacional_por_Amostra_de_Domicilios_continua/Trimestral/Microdados/2017/PNADC_042017_20220916.zip" "PNADC_042017_20220916.zip"

copy "https://ftp.ibge.gov.br/Trabalho_e_Rendimento/Pesquisa_Nacional_por_Amostra_de_Domicilios_continua/Trimestral/Microdados/2018/PNADC_012018_20220916.zip" "PNADC_012018_20220916.zip"
copy "https://ftp.ibge.gov.br/Trabalho_e_Rendimento/Pesquisa_Nacional_por_Amostra_de_Domicilios_continua/Trimestral/Microdados/2018/PNADC_022018_20220916.zip" "PNADC_022018_20220916.zip"
copy "https://ftp.ibge.gov.br/Trabalho_e_Rendimento/Pesquisa_Nacional_por_Amostra_de_Domicilios_continua/Trimestral/Microdados/2018/PNADC_032018_20220916.zip" "PNADC_032018_20220916.zip"
copy "https://ftp.ibge.gov.br/Trabalho_e_Rendimento/Pesquisa_Nacional_por_Amostra_de_Domicilios_continua/Trimestral/Microdados/2018/PNADC_042018_20220916.zip" "PNADC_042018_20220916.zip"

* Unzip all
local files_to_unzip : dir . files "*.zip"

foreach file of local files_to_unzip {
  unzipfile `file', replace
}


****************************
* Read files with Datazoom *
****************************

* Note 1: The option "idrs" uses the Ribas and Soares algorithm to match individuals over different waves
* Note 2: The user should define the directory paths below

datazoom_pnadcontinua, years( 2017 2018 ) ///
    original(C:/***/replication_package/data/source_files/pnad) ///
    saving(C:/***/replication_package/data/source_files/pnad) idrs

    
******************    
* Stack the data *
******************

clear

* Set working directory
cd "${analysis}/data/source_files/pnad/pnadcontinua"

* Stack panels
cap append using PNAD_painel_5_rs
cap append using PNAD_painel_6_rs
cap append using PNAD_painel_7_rs

tab Ano, missing


* Set missing values
********************

replace V2008  = . if V2008  == 99
replace V20081 = . if V20081 == 99
replace V20082 = . if V20082 == 9999
replace V2010  = . if V2010  == 9


* Export
********

keep if inlist(Ano, 2017, 2018)
qui compress
save "${analysis}/data/1_2_pnad.dta", replace


* Housekeeping
**************

* * Set working directory
* cd "${analysis}/data/source_files/pnad/pnadcontinua"
* 
* local files_to_delete : dir . files "*.dta"
* 
* foreach file of local files_to_delete {
*   erase `file'
* }
* 
* * Set working directory
* cd "${analysis}/data/source_files/pnad"
* 
* local files_to_delete : dir . files "*.txt"
* 
* foreach file of local files_to_delete {
*   erase `file'
* }
* 
* clear
* rmdir pnadcontinua


* End of script
***************
cap log close